In [342]:
suppressPackageStartupMessages({
    library(Seurat, quietly = T)
    library(monocle3, quietly = T)
    library(SeuratWrappers, quietly = T)
    
    library(WGCNA, quietly = T)
    library(hdWGCNA, quietly = T)
    
    library(clusterProfiler)
    
    library(dplyr, quietly = T)
    library(tidyr, quietly = T)
    library(magrittr, quietly = T)
    library(tibble, quietly = T)
    library(reshape2, quietly = T)
    library(stringr)
    library(plyr, quietly = T)
    library(textshape, quietly = T)
    
    library(ggplot2, quietly = T)
    library(cowplot, quietly = T)
    library(RColorBrewer, quietly = T)
    library(patchwork)
})
In [2]:
server = 'mando'
if (server == 'jabba'){
    data_path = '/data3/hratch/norcross_abc/'
}else if (server == 'mando'){
    data_path = '/data/hratch/norcross_abc/'
}
In [ ]:
n.cores<-20

seed<-888
set.seed(12345)
# optionally enable multithreading
WGCNA::enableWGCNAThreads(nThreads = n.cores)

Associated tutorials:

  1. https://cole-trapnell-lab.github.io/monocle3/docs/trajectories/
  2. http://htmlpreview.github.io/?https://github.com/satijalab/seurat-wrappers/blob/master/docs/monocle3.html
  3. https://stuartlab.org/signac/articles/monocle.html
In [3]:
# Load and format T cell data to get just the CD8s
In [35]:
abc.tcells<-readRDS(paste0(data_path, 'processed/abc_tcells.RDS'))
Idents(abc.tcells)<-'Cell.Type.Level2'
In [45]:
abc.tcells
An object of class Seurat 
20723 features across 14214 samples within 2 assays 
Active assay: integrated (2000 features, 2000 variable features)
 1 other assay present: RNA
 2 dimensional reductions calculated: pca, umap

Subset to just the CD8+ T cells

In [107]:
abc.cd8s.all<-abc.tcells[, unname(sapply(as.character(abc.tcells$Cell.Type.Level2), function(x) startsWith(x, 'CD8+')))]

We want to place the root node at the CD8 naive T-cells Looking at the UMAP, we can see there is a main group of CD8+ naive T-cells (clusters 30, 41, and 43) and then an additional cluster (cluster 44) that is transcriptionally distinct in UMAP space. Accordingly, we would like to set the root node based on the main cluster:

In [6]:
h_ = 5
w_ = 12
options(repr.plot.height=h_, repr.plot.width=w_)

g1A<-DimPlot(abc.cd8s.all)
g1B<-DimPlot(abc.cd8s.all, group.by = 'Seurat.Clusters.Level2')
g1<-cowplot::plot_grid(g1A, g1B, ncol = 2)
g1
In [7]:
h_ = 10
w_ = 20
options(repr.plot.height=h_, repr.plot.width=w_)

g2A<-DimPlot(abc.cd8s.all, split.by = 'orig.ident')
g2B<-DimPlot(abc.cd8s.all, group.by = 'Seurat.Clusters.Level2', split.by = 'orig.ident')
g2<-cowplot::plot_grid(g2A, g2B, ncol = 1)
g2
In [8]:
md<-abc.cd8s.all@meta.data

md[['Root.Node']]<-'no'
md[(md$Cell.Type.Level2 == 'CD8+ TN') & (md$Seurat.Clusters.Level2 != '44'), 'Root.Node']<-'yes'
abc.cd8s.all@meta.data<-md

Do the trajectory analysis:

In [10]:
# a helper function to identify the root principal points:
get_earliest_principal_node <- function(cds, root.node="yes"){
  cell_ids <- which(colData(cds)[, "Root.Node"] == root.node)
  
  closest_vertex <-
  cds@principal_graph_aux[["UMAP"]]$pr_graph_cell_proj_closest_vertex
  closest_vertex <- as.matrix(closest_vertex[colnames(cds), ])
  root_pr_nodes <-
  igraph::V(principal_graph(cds)[["UMAP"]])$name[as.numeric(names
  (which.max(table(closest_vertex[cell_ids,]))))]
  
  root_pr_nodes
}

Run trajectory analysis on all samples simultaneously rather than each one separately:

In [11]:
abc.cd8s.all.cds<-SeuratWrappers::as.cell_data_set(abc.cd8s.all)
abc.cd8s.all.cds<-monocle3::cluster_cells(cds = abc.cd8s.all.cds)
abc.cd8s.all.cds<-monocle3::learn_graph(cds = abc.cd8s.all.cds)#, use_partition = F) 
abc.cd8s.all.cds <- order_cells(abc.cd8s.all.cds, root_pr_nodes=get_earliest_principal_node(abc.cd8s.all.cds))

saveRDS(abc.cd8s.all.cds, paste0(data_path, 'processed/', 'cd8s_monocole_obj.rds'))
  |======================================================================| 100%
In [14]:
saveRDS(abc.cd8s.all.cds, paste0(data_path, 'processed/', 'cd8s_monocole_obj.rds'))

white circles are roots, gray circles are leaves, blue circles are branch points:

In [13]:
h_ = 6
w_ = 12
options(repr.plot.height=h_, repr.plot.width=w_)

g1A<-DimPlot(abc.cd8s.all) + ggtitle('All Conditions')
g1B<-plot_cells(cds = abc.cd8s.all.cds, color_cells_by = "pseudotime", show_trajectory_graph = TRUE, 
                   trajectory_graph_segment_size = 1, trajectory_graph_color = "black",
                   graph_label_size = 3,
              label_cell_groups=FALSE, label_roots = TRUE,
               label_leaves=TRUE,
               label_branch_points=TRUE)
g1<-g1A+g1B

for (ext in c('.svg', '.png', '.pdf')){
    fn<-paste0(data_path, 'figures/', 'cd8_trajectory_all', ext)
    ggsave(fn, g1, height = h_, width = w_)}

g1
In [15]:
traj.plots<-list()
conditions <- unique(abc.cd8s.all.cds$orig.ident)
for (i in seq_along(conditions)){
    g1<-DimPlot(abc.cd8s.so[[i]]) + ggtitle(paste0('Condition: ', conditions[[i]]))
    
#     if (i != length(abc.cd8s.all.cds)){
#         g1<-g1 + theme(legend.position="none")
#     }
    
    g2<-plot_cells(cds = abc.cd8s.all.cds[, abc.cd8s.all.cds$orig.ident == conditions[[i]]], 
                   color_cells_by = "pseudotime", show_trajectory_graph = TRUE, 
                   trajectory_graph_segment_size = 1, trajectory_graph_color = "black",
                   graph_label_size = 3,
              label_cell_groups=FALSE, label_roots = TRUE,
               label_leaves=TRUE,
               label_branch_points=TRUE)
    g<-cowplot::plot_grid(g1, g2, ncol = 1)#g1 + g2
    traj.plots[[conditions[[i]]]]<-g
}
In [16]:
h_ = 12
w_ = 35
options(repr.plot.height=h_, repr.plot.width=w_)

g2<-cowplot::plot_grid(plotlist=traj.plots, ncol = length(traj.plots))

for (ext in c('.svg', '.png', '.pdf')){
    fn<-paste0(data_path, 'figures/', 'cd8_trajectory_all_bycondition', ext)
    ggsave(fn, g2, height = h_, width = w_)}

g2

Analysis of DE/Enriched Genes in Trajectories¶

from: https://cole-trapnell-lab.github.io/monocle3/docs/differential/#branches

For branches points, we take cells a little before (psuedotime-wise) the branch point, and cells from each path a little after the branch point.

For paths between nodes, we just take the cells along the trajectory immediately after node 1 (assuming node 1 is a branch) to immediately before node 2 if node 2 is a branch, or cells radially sorrounding node 2 if node 2 is a terminal state.

These regions are selected visually on the UMAP

In [265]:
abc.cd8s.all.cds <- readRDS(paste0(data_path, 'processed/', 'cd8s_monocole_obj.rds'))
abc.cd8s.all.cds <- estimate_size_factors(abc.cd8s.all.cds) # size factors needed for branching analysis
In [266]:
get_subsets<-function(cds, branch_umap, branch_umap_exclude = NULL){
    umap.coords<-reducedDims(abc.cd8s.all.cds)[['UMAP']]
    
    if (!(is.null(branch_umap_exclude))){
        cell.ids.exclude<-c()
        for (bue in branch_umap_exclude){
            cond1 <- (umap.coords[, 1] >= bue$UMAP_1[1])
            cond2 <- (umap.coords[, 1] <= bue$UMAP_1[2])
            cond3 <- (umap.coords[, 2] >= bue$UMAP_2[1])
            cond4 <- (umap.coords[, 2] <= bue$UMAP_2[2])
            cell.ids.exclude<-c(cell.ids.exclude, 
                                rownames(umap.coords[cond1 & cond2 & cond3 & cond4,]))
        }
    }

    cond1 <- (umap.coords[, 1] >= branch_umap$UMAP_1[1])
    cond2 <- (umap.coords[, 1] <= branch_umap$UMAP_1[2])
    cond3 <- (umap.coords[, 2] >= branch_umap$UMAP_2[1])
    cond4 <- (umap.coords[, 2] <= branch_umap$UMAP_2[2])
    
    if (!(is.null(branch_umap_exclude))){
        cond5 <- (!(rownames(umap.coords) %in% cell.ids.exclude))
        all.conds <- cond1 & cond2 & cond3 & cond4 & cond5
    }else{
        all.conds <- cond1 & cond2 & cond3 & cond4
    }
    
    cell.ids<-rownames(umap.coords[all.conds,])
    
    return(cell.ids)
}

We will visually select cell subsets according to UMAP coordinates to analyze branches/trajectories of interest:

In [267]:
branch.cells.list<-list()

Branch 1:

In [268]:
branch.label<-'Branch1.cells'
branch_umap<-list('UMAP_1' = c(-4.25, -2), 'UMAP_2' = c(-2, 0.3))
branch_umap_exclude<-list(list('UMAP_1' = c(-4.25, -3.5), 'UMAP_2' = c(-2, -1)), 
                          list('UMAP_1' = c(-3.5, -2), 'UMAP_2' = c(-2, -1.5)))


branch.cells<-get_subsets(abc.cd8s.all.cds, branch_umap, branch_umap_exclude)
colData(abc.cd8s.all.cds)[[branch.label]]<-'no'
colData(abc.cd8s.all.cds)[branch.cells, branch.label]<-'yes'
colData(abc.cd8s.all.cds)[[branch.label]] <- factor(colData(abc.cd8s.all.cds)[[branch.label]], levels = c('yes', 'no'))

branch.cells.list[[branch.label]]<-branch.cells
In [269]:
h_ = 6
w_ = 6
options(repr.plot.height=h_, repr.plot.width=w_)

g.branch<-monocle3::plot_cells(cds = abc.cd8s.all.cds, color_cells_by = branch.label, show_trajectory_graph = TRUE, 
                   trajectory_graph_segment_size = 1, trajectory_graph_color = "black",
                   graph_label_size = 3,
              label_cell_groups=FALSE, label_roots = TRUE,
               label_leaves=TRUE,
               label_branch_points=TRUE) + ggtitle(branch.label)
g.branch

Branch 3

In [272]:
branch.label<-'Branch3.cells'
branch_umap<-list('UMAP_1' = c(-2.25, 1), 'UMAP_2' = c(-1.75, 0.5))
branch_umap_exclude<-NULL#list(list('UMAP_1' = c(-2.75, -1.75), 'UMAP_2' = c(-2.25, -0.25)))


branch.cells<-get_subsets(abc.cd8s.all.cds, branch_umap, branch_umap_exclude)
colData(abc.cd8s.all.cds)[[branch.label]]<-'no'
colData(abc.cd8s.all.cds)[branch.cells, branch.label]<-'yes'
colData(abc.cd8s.all.cds)[[branch.label]] <- factor(colData(abc.cd8s.all.cds)[[branch.label]], levels = c('yes', 'no'))

branch.cells.list[[branch.label]]<-branch.cells
In [273]:
h_ = 6
w_ = 6
options(repr.plot.height=h_, repr.plot.width=w_)

g.branch<-plot_cells(cds = abc.cd8s.all.cds, color_cells_by = branch.label, show_trajectory_graph = TRUE, 
                   trajectory_graph_segment_size = 1, trajectory_graph_color = "black",
                   graph_label_size = 3,
              label_cell_groups=FALSE, label_roots = TRUE,
               label_leaves=TRUE,
               label_branch_points=TRUE) + ggtitle(branch.label)
g.branch

Path 1A: from branch 3 to terminal state 3

In [274]:
branch.label<-'Path1A.cells'
branch_umap<-list('UMAP_1' = c(-1.25, 1.5), 'UMAP_2' = c(-4, -1))
branch_umap_exclude<-NULL#list(list('UMAP_1' = c(-2.75, -1.75), 'UMAP_2' = c(-2.25, -0.25)))


branch.cells<-get_subsets(abc.cd8s.all.cds, branch_umap, branch_umap_exclude)
colData(abc.cd8s.all.cds)[[branch.label]]<-'no'
colData(abc.cd8s.all.cds)[branch.cells, branch.label]<-'yes'
colData(abc.cd8s.all.cds)[[branch.label]] <- factor(colData(abc.cd8s.all.cds)[[branch.label]], levels = c('yes', 'no'))

branch.cells.list[[branch.label]]<-branch.cells
In [275]:
h_ = 6
w_ = 6
options(repr.plot.height=h_, repr.plot.width=w_)

g.branch<-plot_cells(cds = abc.cd8s.all.cds, color_cells_by = branch.label, show_trajectory_graph = TRUE, 
                   trajectory_graph_segment_size = 1, trajectory_graph_color = "black",
                   graph_label_size = 3,
              label_cell_groups=FALSE, label_roots = TRUE,
               label_leaves=TRUE,
               label_branch_points=TRUE) + ggtitle(branch.label)
g.branch

Path 1B: from branch 3 to branch 4

In [276]:
branch.label<-'Path1B.cells'
branch_umap<-list('UMAP_1' = c(-2.25, 0), 'UMAP_2' = c(-1, 2.25))
branch_umap_exclude<-list(list('UMAP_1' = c(-2.25, -1.75), 'UMAP_2' = c(-1, -0.25)), 
                         list('UMAP_1' = c(-2.25, -1.25), 'UMAP_2' = c(1.75, 2.25)))


branch.cells<-get_subsets(abc.cd8s.all.cds, branch_umap, branch_umap_exclude)
colData(abc.cd8s.all.cds)[[branch.label]]<-'no'
colData(abc.cd8s.all.cds)[branch.cells, branch.label]<-'yes'
colData(abc.cd8s.all.cds)[[branch.label]] <- factor(colData(abc.cd8s.all.cds)[[branch.label]], levels = c('yes', 'no'))

branch.cells.list[[branch.label]]<-branch.cells
In [277]:
h_ = 6
w_ = 6
options(repr.plot.height=h_, repr.plot.width=w_)

g.branch<-plot_cells(cds = abc.cd8s.all.cds, color_cells_by = branch.label, show_trajectory_graph = TRUE, 
                   trajectory_graph_segment_size = 1, trajectory_graph_color = "black",
                   graph_label_size = 3,
              label_cell_groups=FALSE, label_roots = TRUE,
               label_leaves=TRUE,
               label_branch_points=TRUE) + ggtitle(branch.label)
g.branch

Path 2: from branch 4 to terminal state 2

In [278]:
branch.label<-'Path2.cells'
branch_umap<-list('UMAP_1' = c(0.25, 10), 'UMAP_2' = c(-1.5, 5))
branch_umap_exclude<-list(list('UMAP_1' = c(0.25, 0.75), 'UMAP_2' = c(-2.25, 1.5)), 
                         list('UMAP_1' = c(0.25, 4), 'UMAP_2' = c(-2.25, 1)))


branch.cells<-get_subsets(abc.cd8s.all.cds, branch_umap, branch_umap_exclude)
colData(abc.cd8s.all.cds)[[branch.label]]<-'no'
colData(abc.cd8s.all.cds)[branch.cells, branch.label]<-'yes'
colData(abc.cd8s.all.cds)[[branch.label]] <- factor(colData(abc.cd8s.all.cds)[[branch.label]], levels = c('yes', 'no'))

branch.cells.list[[branch.label]]<-branch.cells
In [279]:
h_ = 6
w_ = 6
options(repr.plot.height=h_, repr.plot.width=w_)

g.branch<-plot_cells(cds = abc.cd8s.all.cds, color_cells_by = branch.label, show_trajectory_graph = TRUE, 
                   trajectory_graph_segment_size = 1, trajectory_graph_color = "black",
                   graph_label_size = 3,
              label_cell_groups=FALSE, label_roots = TRUE,
               label_leaves=TRUE,
               label_branch_points=TRUE) + ggtitle(branch.label)
g.branch
In [310]:
q.val.thresh<-0.1

metascape_input = list()
# note, CDS only uses the 2000 HVGs
metascape_input[['_BACKGROUND']]<-rownames(abc.cd8s.all.cds) # the universe

der.branches<-list()
for (branch.label in names(branch.cells.list)){
    branch.cells<-branch.cells.list[[branch.label]]
    
    # do the DE analysis (on the 2k HVGs in the integrated counts)
    # doing on integrated counts is fine bc unlike transcriptional analysis script 4, there is no controlling 
    # for batch in this moran's I "graph_test"
    abc.cd8s.branch.cds<-abc.cd8s.all.cds[, branch.cells]
    der.branch <- graph_test(abc.cd8s.branch.cds, neighbor_graph="principal_graph", cores=n.cores)
    der.branch <- der.branch[(der.branch$status == 'OK'),]# & (der.branch$q_value <= q.val.thresh),]
    der.branch<-der.branch[with(der.branch, order(-abs(morans_I), q_value)), ]
    der.branch[['Condition']]<-branch.label
    
    der.branch[['gene.id']]<-rownames(der.branch)
    der.branches[[branch.label]]<-der.branch
    
    metascape_input[[branch.label]]<-rownames(der.branch[der.branch$q_value <= q.val.thresh,])
    
    # de.genes<-rownames(der.branches)

    # # run coexpression analysis on 
    # # run on all HVGs, not just the identified DE genes, otherwise too few modules are identified 
    # wgcna.branch<-abc.cd8s.all[,branch.cells] # subset to branch

    # # remove all 0 genes
    # expr<-wgcna.branch@assays$RNA@data
    # genes.to.keep<-rownames(expr[rowSums(wgcna.branch[])>0, ])
    # wgcna.branch<-wgcna.branch[genes.to.keep, ] 

    # wgcna.branch <- hdWGCNA::SetupForWGCNA(seurat_obj =  wgcna.branch, 
    #                                 gene_select = 'custom', gene_list = de.genes,
    # #                               gene_select = 'variable',
    #                                 wgcna_name = branch.label)
    # wgcna.branch <- hdWGCNA::SetDatExpr(seurat_obj = wgcna.branch,
    #                                     group_name = as.character(unique(abc.cd8s.branch.so$Cell.Type.Level2)), 
    #                                     group.by='Cell.Type.Level2', assay = 'RNA', slot = 'data')
    # wgcna.branch <- hdWGCNA::TestSoftPowers(wgcna.branch, networkType = 'signed')
    # wgcna.branch <- hdWGCNA::ConstructNetwork(wgcna.branch, setDatExpr=FALSE, overwrite_tom = TRUE,
    #                                soft_power=NULL, # automated selection
    #                                )
    # modules <- GetModules(wgcna.branch)

}

# format input to metascape
max.length <- max(sapply(metascape_input, length))
metascape_input <- lapply(metascape_input, function(v) { c(v, rep(NA, max.length-length(v)))})
metascape_input<-do.call(cbind, metascape_input)
write.csv(metascape_input, paste0(data_path, 'interim/', 'traj_analysis_metascape_input.csv'), row.names=FALSE)
  |=======================================================| 100%, Elapsed 00:03
  |=======================================================| 100%, Elapsed 00:03
  |=======================================================| 100%, Elapsed 00:02
  |=======================================================| 100%, Elapsed 00:03
  |=======================================================| 100%, Elapsed 00:05
In [414]:
de.res<-do.call("rbind", der.branches)
de.res<-de.res[de.res$q_value <= q.val.thresh, ]
write.csv(de.res, paste0(data_path, 'processed/', 'traj_analysis_de.csv'))

No. of differentially expressed genes per condition (note, only the 2k HVGs are run in trajectory analysis):

In [415]:
table(de.res$Condition)
Branch1.cells Branch3.cells  Path1A.cells  Path1B.cells   Path2.cells 
           94           158           145           141            68 

Metascape Enrichment of Trajectory-Enriched DE Genes¶

Run metascape on the DE genes (q <= 0.1) on all conditions at once and visualize output below:

In [331]:
format.ms<-function(ms, val_col){
    val_df<-as.data.frame(pivot_wider(data = ms, id_cols = 'GeneList', names_from = 'Description', 
                                      values_from = val_col, 
                                    values_fn = mean))
    val_df<-t(column_to_rownames(x = val_df, loc = 1))

    return(val_df)
}
In [332]:
ms.top20<-read.csv(paste0(data_path, 'interim/', 'traj_analysis_metascape_out/Enrichment_heatmap/HeatmapSelectedGO.csv'))
ms.top100<-read.csv(paste0(data_path, 'interim/', 'traj_analysis_metascape_out/Enrichment_heatmap/HeatmapSelectedGOTop100.csv'))
ms<-read.csv(paste0(data_path, 'interim/', 'traj_analysis_metascape_out/Enrichment_GO/GO_AllLists.csv'))

First, let's see if there is any duplicate Description/GeneList combinations:

In [335]:
ms %>%
  dplyr::group_by(GeneList, Description) %>%
  dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
  dplyr::filter(n > 1L)
A tibble: 0 × 3
GeneListDescriptionn
<chr><chr><int>

There are no duplicate terms

In [338]:
freq<-format.ms(ms = ms, val_col = 'X.InGO')[ms.top20$Description, ]
pvals<-format.ms(ms = ms, val_col = 'Log.q.value.')[ms.top20$Description, ]

pvals<-as.data.frame(pvals)
pvals[['Enrichment.Term']]<-rownames(pvals)
freq<-as.data.frame(freq)
freq[['Enrichment.Term']]<-rownames(freq)

pvals<-melt(pvals, id.vars = 'Enrichment.Term', value.name = 'log10p', variable.name = 'Condition')
freq<-melt(freq, id.vars = 'Enrichment.Term', value.name = 'Frequency', variable.name = 'Condition')
pvals[['log10p']] <- -as.numeric(pvals[['log10p']]) 

viz.df <- cbind(pvals, Frequency = as.numeric(freq$Frequency))
In [394]:
h_ = 10
w_ = 25
options(repr.plot.height=h_, repr.plot.width=w_)

green_hex = brewer.pal(n = 11, name ='RdYlGn')[[11]]
yellow_hex = brewer.pal(n = 11, name = 'RdYlGn')[[6]]
red_hex = brewer.pal(n = 11, name = 'RdYlGn')[[1]]

lower_q = min(viz.df$log10p[!(is.na(viz.df$log10p))])
upper_q = max(viz.df$log10p[!(is.na(viz.df$log10p))])
middle_q = mean(c(lower_q, upper_q)) # median(viz.df$log10p[!(is.na(viz.df$log10p))]) # 
# middle_q = 7.5

g<-ggplot(data = viz.df, aes(x = Condition, y = Enrichment.Term, color = log10p, size = Frequency)) + 
geom_point() + 
# scale_color_gradient2(low = green_hex, mid = yellow_hex, high = red_hex, name = '-log10(q-val)', 
#                      limits = c(lower_q, upper_q), midpoint = middle_q) + 
scale_colour_gradientn(colours = rev(c("darkred", "orange", "yellow")), name = '-log10(q-val)')+
scale_size_continuous(range = c(1,15)) +
theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
                  text = element_text(size = 20))


for (ext in c('.svg', '.png', '.pdf')){
    fn<-paste0(data_path, 'figures/', 'traj_analysis_metascape_enrichment_dotplot', ext)
    ggsave(fn, g, height = h_, width = w_)}

g
Warning message:
“Removed 59 rows containing missing values (`geom_point()`).”
Warning message:
“Removed 59 rows containing missing values (`geom_point()`).”
Warning message:
“Removed 59 rows containing missing values (`geom_point()`).”
Warning message:
“Removed 59 rows containing missing values (`geom_point()`).”
In [398]:
freq<-format.ms(ms = ms, val_col = 'X.InGO')[ms.top100$Description, ]
pvals<-format.ms(ms = ms, val_col = 'Log.q.value.')[ms.top100$Description, ]

pvals<-as.data.frame(pvals)
pvals[['Enrichment.Term']]<-rownames(pvals)
freq<-as.data.frame(freq)
freq[['Enrichment.Term']]<-rownames(freq)

pvals<-melt(pvals, id.vars = 'Enrichment.Term', value.name = 'log10p', variable.name = 'Condition')
freq<-melt(freq, id.vars = 'Enrichment.Term', value.name = 'Frequency', variable.name = 'Condition')
pvals[['log10p']] <- -as.numeric(pvals[['log10p']]) 

viz.df <- cbind(pvals, Frequency = as.numeric(freq$Frequency))
In [404]:
h_ = 20
w_ = 25
options(repr.plot.height=h_, repr.plot.width=w_)

green_hex = brewer.pal(n = 11, name ='RdYlGn')[[11]]
yellow_hex = brewer.pal(n = 11, name = 'RdYlGn')[[6]]
red_hex = brewer.pal(n = 11, name = 'RdYlGn')[[1]]

lower_q = min(viz.df$log10p[!(is.na(viz.df$log10p))])
upper_q = max(viz.df$log10p[!(is.na(viz.df$log10p))])
middle_q = mean(c(lower_q, upper_q)) # median(viz.df$log10p[!(is.na(viz.df$log10p))]) # 
# middle_q = 7.5

g<-ggplot(data = viz.df, aes(x = Condition, y = Enrichment.Term, color = log10p, size = Frequency)) + 
geom_point() + 
# scale_color_gradient2(low = green_hex, mid = yellow_hex, high = red_hex, name = '-log10(q-val)', 
#                      limits = c(lower_q, upper_q), midpoint = middle_q) + 
scale_colour_gradientn(colours = rev(c("darkred", "orange", "yellow")), name = '-log10(q-val)')+
scale_size_continuous(range = c(1,15)) +
theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
                  text = element_text(size = 20))

g
Warning message:
“Removed 141 rows containing missing values (`geom_point()`).”

Co-Expression Analysis of DE Enriched Genes¶

First, we need to group genes into co-expression clusters. To do so, we will run hdWGCNA on the entire CD8+ dataset, then look at how the DE genes identified in the above analysis group by these co-expression clusters

Running on just the subset of DE genes or cell subsets in the branches is too underpowered for hdWGCNA

We run this on the raw counts and then normalize the metacells as suggested by the tutorial

In [387]:
wgcna.so <- abc.cd8s.all
DefaultAssay(wgcna.so)<-'RNA'

wgcna.so <- hdWGCNA::SetupForWGCNA(seurat_obj =  wgcna.so,
                                   gene_select = "fraction", # the gene selection approach
                                  fraction = 0.05,
                                wgcna_name = 'CD8.wgcna')

# construct metacells  in each group
wgcna.so <- MetacellsByGroups(
  seurat_obj = wgcna.so,
  group.by = c("Cell.Type.Level2", "orig.ident"), # specify the columns in seurat_obj@meta.data to group by
  reduction = 'pca', # select the dimensionality reduction to perform KNN on
  k = 20, # nearest-neighbors parameter
  max_shared = 5, # maximum number of shared cells between two metacells
  ident.group = 'Cell.Type.Level2', # set the Idents of the metacell seurat object
    assay = 'RNA', 
    slot = 'counts'
)

wgcna.so <- NormalizeMetacells(wgcna.so)

# # normalize/batch correct the metacells
# suppressMessages({
#     suppressWarnings({
#     wgcna.so <- NormalizeMetacells(wgcna.so)
#     wgcna.so <- ScaleMetacells(wgcna.so, features=VariableFeatures(abc.cd8s.all))
#     wgcna.so <- RunPCAMetacells(wgcna.so, features=VariableFeatures(abc.cd8s.all))
#     wgcna.so <- RunHarmonyMetacells(wgcna.so, group.by.vars='orig.ident')
#     wgcna.so <- RunUMAPMetacells(wgcna.so, reduction='harmony', dims=1:15)
#     })
# })

# p1 <- DimPlotMetacells(wgcna.so, group.by='Cell.Type.Level2') + umap_theme() + ggtitle("Cell Type")
# p2 <- DimPlotMetacells(wgcna.so, group.by='orig.ident') + umap_theme() + ggtitle("Sample")

# p1 | p2

wgcna.so <- hdWGCNA::SetDatExpr(seurat_obj = wgcna.so,
                                    group_name = as.character(unique(wgcna.so$Cell.Type.Level2)), 
                                    group.by='Cell.Type.Level2', 
                                    use_metacells=TRUE,
                                    assay = 'RNA', slot = 'data' # use the log-normalized counts
                                   )
Warning message in MetacellsByGroups(seurat_obj = wgcna.so, group.by = c("Cell.Type.Level2", :
“Removing the following groups that did not meet min_cells: CD8+ TE/Ex#ABC, CD8+ TE/Ex#DT_Veh, CD8+ TE/Ex#UNTR, CD8+ TEA_1#DT_Veh, CD8+ TEA_1#UNTR, CD8+ TEA_2#ABC, CD8+ TEA_2#DT_ABC, CD8+ TEA_2#DT_Veh, CD8+ TEA_2#UNTR, CD8+ TEx prec#ABC, CD8+ TEx prec#DT_Veh, CD8+ TEx prec#UNTR, CD8+ TEx#ABC, CD8+ TEx#DT_Veh, CD8+ TEx#UNTR, CD8+ TN/EA-ISG#DT_Veh, CD8+ TN/EA-ISG#UNTR”
In [390]:
# Test different soft powers:
wgcna.so <- TestSoftPowers(
  wgcna.so,
  networkType = 'signed' # you can also use "unsigned" or "signed hybrid"
)

# plot the results:
plot_list <- hdWGCNA::PlotSoftPowers(wgcna.so)

# assemble with patchwork
patchwork::wrap_plots(plot_list, ncol=2)
pickSoftThreshold: will use block size 5219.
 pickSoftThreshold: calculating connectivity for given powers...
   ..working on genes 1 through 5219 of 8572
   ..working on genes 5220 through 8572 of 8572
   Power SFT.R.sq slope truncated.R.sq  mean.k. median.k.  max.k.
1      1  0.34500 19.40          0.796 4.34e+03  4.34e+03 4620.00
2      2  0.00781 -1.44          0.944 2.22e+03  2.21e+03 2650.00
3      3  0.27200 -6.09          0.780 1.15e+03  1.13e+03 1600.00
4      4  0.71900 -7.22          0.862 6.02e+02  5.79e+02 1010.00
5      5  0.92600 -6.98          0.966 3.20e+02  3.01e+02  666.00
6      6  0.94800 -5.85          0.970 1.72e+02  1.57e+02  454.00
7      7  0.95600 -4.82          0.978 9.38e+01  8.28e+01  320.00
8      8  0.95700 -4.12          0.979 5.21e+01  4.38e+01  233.00
9      9  0.95800 -3.60          0.981 2.95e+01  2.33e+01  173.00
10    10  0.95100 -3.22          0.979 1.70e+01  1.25e+01  132.00
11    12  0.91100 -2.76          0.965 6.13e+00  3.65e+00   81.10
12    14  0.90700 -2.34          0.972 2.47e+00  1.10e+00   53.00
13    16  0.90000 -2.05          0.973 1.13e+00  3.37e-01   36.30
14    18  0.85700 -1.88          0.925 5.80e-01  1.05e-01   25.80
15    20  0.85700 -1.71          0.926 3.31e-01  3.37e-02   18.80
16    22  0.92500 -1.52          0.981 2.06e-01  1.10e-02   14.10
17    24  0.95800 -1.40          0.990 1.36e-01  3.67e-03   10.70
18    26  0.97300 -1.31          0.983 9.49e-02  1.25e-03    8.31
19    28  0.98000 -1.28          0.987 6.85e-02  4.30e-04    7.02
20    30  0.98700 -1.25          0.994 5.10e-02  1.51e-04    6.02
  Power    SFT.R.sq     slope truncated.R.sq   mean.k. median.k.    max.k.
1     1 0.344777420 19.393750      0.7955631 4336.2207 4337.3481 4624.2984
2     2 0.007808727 -1.436221      0.9442834 2219.6247 2205.2754 2653.7066
3     3 0.272279141 -6.086219      0.7800630 1149.3675 1126.7147 1602.5404
4     4 0.718665099 -7.217655      0.8624287  602.2537  579.3024 1012.2800
5     5 0.926057500 -6.976546      0.9658155  319.5523  300.7122  665.8585
6     6 0.947885285 -5.848773      0.9704821  171.8724  157.2604  454.2945
In [391]:
# construct co-expression network:
wgcna.so <- ConstructNetwork(
  wgcna.so, soft_power=5,
  setDatExpr=FALSE,
  overwrite_tom = TRUE
)
modules <- GetModules(wgcna.so)
 Calculating consensus modules and module eigengenes block-wise from all genes
 Calculating topological overlaps block-wise from all genes
   Flagging genes and samples with too many missing values...
    ..step 1
    TOM calculation: adjacency..
    ..will use 20 parallel threads.
     Fraction of slow calculations: 0.000000
    ..connectivity..
    ..matrix multiplication (system BLAS)..
    ..normalization..
    ..done.
 ..Working on block 1 .
 ..Working on block 1 .
 ..merging consensus modules that are too close..

Number of genes assigned to each co-expression cluster (grey means they were not assigned)

In [407]:
table(modules$module)
     grey     green turquoise    yellow     brown      blue       red     black 
     6160       172       703       197       530       547       110        94 
     pink 
       59 

The distribution of these within the DE genes identified during the trajectory analysis:

In [423]:
gene.map<-modules$module
names(gene.map)<-rownames(modules)
de.res[['WGCNA.cluster']]<-gene.map[de.res$gene.id]
de.res[is.na(de.res$WGCNA.cluster), 'WGCNA.cluster'] <- 'grey'

table(de.res$WGCNA.cluster)
     grey     green turquoise    yellow     brown      blue       red     black 
      503         0        11         3        18        33        27         6 
     pink 
        5 

Compare this to all the DE genes:

In [495]:
table(de.res[c('Condition', 'WGCNA.cluster')])
               WGCNA.cluster
Condition       grey green turquoise yellow brown blue red black pink
  Branch1.cells   78     0         1      1     3    4   5     1    1
  Branch3.cells  137     0         3      0     3    7   6     1    1
  Path1A.cells   128     0         1      0     3    5   5     2    1
  Path1B.cells   118     0         4      1     3    7   6     1    1
  Path2.cells     42     0         2      1     6   10   5     1    1

Enrichment of co-expression clusters in various trajectory path DE genes¶

Next, let's see whether any of the conditions are enriched for any of the coexpression modules (disregarding "grey" genes not assigned to a pathway):

In [904]:
# p.adjust = BH corrected -- # https://github.com/YuLab-SMU/clusterProfiler/issues/124
# GeneRatio -- ratio of input genes in specific pathway vs input genes in all pathways https://www.biostars.org/p/220465/

# can improve visualization by directly manipulating the @result of each output
# if doing this, change qvalueCutoff to 1 and directly filter

visualize_ora<-function(cp.out, factor.name, sig.thresh = 0.1, top_n_terms = NULL){
    viz.df<-cp.out@result
    
    if (is.null(top_n_terms)){
        top_n_terms = dim(viz.df)[[1]]
    }
    
    viz.df[['significant']]<-paste0('fdr > ', sig.thresh)
    viz.df[viz.df$p.adjust <= sig.thresh, 'significant']<-paste0('fdr', ' \u2264 ',  sig.thresh)

    viz.df[['significant']]<-factor(viz.df[['significant']], 
                                    levels = c(paste0('fdr', ' \u2264 ',  sig.thresh), paste0('fdr > ', sig.thresh)))
    viz.df[['log.fdr']]<- -log10(viz.df$p.adjust)
    viz.df[['GeneRatio2']]<-unlist(unname(sapply(viz.df$GeneRatio, function(x) eval(parse(text=x)))))

    viz.df<-viz.df[with(viz.df, order(significant, -GeneRatio2, -log.fdr, -Count)), ]

    viz.df<-viz.df[1:top_n_terms, ]
    viz.df[['ID']]<-factor(viz.df[['ID']], 
                                    levels = rev(viz.df[['ID']]))

    green_hex = brewer.pal(n = 11, name ='RdYlGn')[[11]]
    yellow_hex = brewer.pal(n = 11, name = 'RdYlGn')[[6]]
    red_hex = brewer.pal(n = 11, name = 'RdYlGn')[[1]]

    lower_q = min(viz.df$log.fdr[!(is.na(viz.df$log.fdr))])
    upper_q = max(viz.df$log.fdr[!(is.na(viz.df$log.fdr))])
    middle_q = mean(c(lower_q, upper_q))#median(viz.df$log.fdr[!(is.na(viz.df$log.fdr))]) 
    # middle_q = 7.5

    g<-ggplot(data = viz.df, aes(x = GeneRatio2, y = ID, color = log.fdr, size = Count, shape = significant)) + 
    geom_point() + 
    ylab('') + xlab('GeneRatio') + ggtitle(factor.name) +
#     scale_color_gradient2(low = green_hex, mid = yellow_hex, high = red_hex, name = '-log10(fdr)', 
#                          limits = c(lower_q, upper_q), midpoint = middle_q) + 
    scale_colour_gradientn(colours = rev(c("darkred", "orange", "yellow")), name = '-log10(fdr)')+
    scale_shape_manual(values = c(16,4), drop = FALSE)+
    scale_size_continuous(range = c(4,12))+
    theme_bw() + theme(#axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
                      text = element_text(size = 15), plot.title = element_text(hjust = 0.5))


    return(g)
}
                                                 
visualize_ora_all<-function(ora_res, sig.thresh = 0.1, top_n_terms = 15)     {
                             
    viz.df <- data.frame(matrix(ncol = 10))
    colnames(viz.df)<-c('ID','Description','GeneRatio','BgRatio','pvalue','p.adjust','qvalue','geneID','Count', 'Comparison')

    for (comp.name in names(ora_res))({
        viz.df.sub<-ora_res[[comp.name]]@result
        viz.df.sub[['Comparison']]<-comp.name
        viz.df<-rbind(viz.df, viz.df.sub)
    })

    viz.df<-viz.df[-1,]
    rownames(viz.df)<-1:dim(viz.df)[[1]]

    viz.df[['significant']]<-paste0('fdr > ', sig.thresh)
    viz.df[viz.df$p.adjust <= sig.thresh, 'significant']<-paste0('fdr', ' \u2264 ',  sig.thresh)

    viz.df[['significant']]<-factor(viz.df[['significant']], 
                                    levels = c(paste0('fdr', ' \u2264 ',  sig.thresh), paste0('fdr > ', sig.thresh)))
    viz.df[['log.fdr']]<- -log10(viz.df$p.adjust)
    viz.df[['GeneRatio2']]<-unlist(unname(sapply(viz.df$GeneRatio, function(x) eval(parse(text=x)))))

    viz.df<-viz.df[with(viz.df, order(significant, -GeneRatio2, -log.fdr, -Count)), ]

    n_terms<-top_n_terms
    unique_n_terms<-length(unique(viz.df[1:n_terms, 'ID']))
    while ((unique_n_terms < top_n_terms) & (n_terms < dim(viz.df)[[1]])){
        n_terms <- n_terms + 1
        unique_n_terms<-length(unique(viz.df[1:n_terms, 'ID']))
    }
    viz.df<-viz.df[1:n_terms, ]

    green_hex = brewer.pal(n = 11, name ='RdYlGn')[[11]]
    yellow_hex = brewer.pal(n = 11, name = 'RdYlGn')[[6]]
    red_hex = brewer.pal(n = 11, name = 'RdYlGn')[[1]]

    lower_q = min(viz.df$log.fdr[!(is.na(viz.df$log.fdr))])
    upper_q = max(viz.df$log.fdr[!(is.na(viz.df$log.fdr))])
    middle_q = mean(c(lower_q, upper_q))#median(viz.df$log.fdr[!(is.na(viz.df$log.fdr))]) 
    # middle_q = 7.5

    g<-ggplot(data = viz.df, aes(x = Comparison, y = ID, color = log.fdr, size = GeneRatio2, shape = significant)) + 
    geom_point() + 
    ylab('') + xlab('') + ggtitle('') +
    #     scale_color_gradient2(low = green_hex, mid = yellow_hex, high = red_hex, name = '-log10(fdr)', 
    #                          limits = c(lower_q, upper_q), midpoint = middle_q) + 
    scale_colour_gradientn(colours = rev(c("darkred", "orange", "yellow")), name = '-log10(fdr)')+
    scale_shape_manual(values = c(16,4), drop = FALSE)+
    scale_size_continuous(range = c(4,12))+
    theme_bw() + theme(#axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), 
                      text = element_text(size = 20), plot.title = element_text(hjust = 0.5))
    return(g)
}
In [905]:
term2gene <- modules[c('color', 'gene_name')]
colnames(term2gene) <- c('Pathway', 'gene.id')

term2gene <- term2gene[term2gene$Pathway != 'grey', ] # ignore the genes not assigned to a pathway
term2gene$Pathway <- factor(term2gene$Pathway)
rownames(term2gene) <- 1:dim(term2gene)[[1]]

We lower the minimum gene set size to 2 since there are so few genes:

In [906]:
ora_res <- list()
for (cond.name in unique(de.res$Condition)){

    dr <- de.res[de.res$Condition == cond.name, ]
    dr <- dr[dr$WGCNA.cluster != 'grey', ] # ignore genes not assigned to a pathway
    dr <- dr$gene.id
    
    ora_res[[cond.name]]<-clusterProfiler::enricher(dr, TERM2GENE = term2gene, 
                                                     pAdjustMethod = 'BH', 
                                                      pvalueCutoff = 1, # will filter later
                                                      qvalueCutoff = 1, # will filter later
                                                    minGSSize = 2, 
                                                    maxGSSize = 1000
                                                     )
}
In [907]:
# visualize
dotplot_list<-c()
for (fn in names(ora_res)){
    cp.out<-ora_res[[fn]]
    g<-visualize_ora(cp.out, fn, top_n_terms = length(unique(term2gene$Pathway)))
    
    dotplot_list[[fn]]<-g

}
In [908]:
h_ = 10
w_ = 35
options(repr.plot.height=h_, repr.plot.width=w_)

suppressWarnings({
    g<-cowplot::plot_grid(dotplot_list[[1]], dotplot_list[[2]], dotplot_list[[3]], dotplot_list[[4]], dotplot_list[[5]], 
                   ncol = 3)
})

g
In [909]:
h_ = 10
w_ = 20
options(repr.plot.height=h_, repr.plot.width=w_)

g<-visualize_ora_all(ora_res, sig.thres = 0.1, top_n_terms = 20)
g

Oddly enough, they are all enriched for the same co-expression cluster - the 'red' one

Next, let's visualize these co-expression clusters (median of the integraed, scaled counts across genes per cell) on the umap in the subregions for each trajectory analysis:

In [872]:
graph_branch_module<-function(cluster.label, ind.scale = T, scale = NULL,
                              de.genes, so, barcodes.branch, viz.df.0, assay, slot){
    de.genes.cluster<-de.genes[de.genes$WGCNA.cluster == cluster.label, 'gene.id']
    if (length(de.genes.cluster) > 0){
        expr<-GetAssayData(object = so, slot = slot, assay = assay)[de.genes.cluster, barcodes.branch]
        
        n.genes<-length(de.genes.cluster)
        
        viz.df<-viz.df.0[barcodes.branch, ]

        if (length(de.genes.cluster) > 1){
            viz.df[['median.expression']]<-colMedians(expr)
        }else{
            viz.df[['median.expression']]<-unlist(unname(expr))
        }
        
        if (ind.scale){
            lower_q = min(viz.df$median.expression[!(is.na(viz.df$median.expression))])
            upper_q = max(viz.df$median.expression[!(is.na(viz.df$median.expression))])
            middle_q = mean(c(lower_q, upper_q))
        }else{
            lower_q = scale[1]
            upper_q = scale[2]
            middle_q = scale[3]
        }
        
        g<-ggplot(viz.df, aes(x = UMAP_1, y = UMAP_2, color = median.expression)) + geom_point() +
        scale_color_gradient2(low = green_hex, mid = yellow_hex, high = red_hex, name = 'median expression', 
                             limits = c(lower_q, upper_q), midpoint = middle_q)+
        ggtitle(paste0('WGCNA Cluster: ', cluster.label, ' | no. genes: ', n.genes))+
        theme_bw()

        return(g)
    }
    else{
        return(NULL)
    }
}
In [875]:
slot = 'scale.data'
assay = 'integrated'

umap.coords<-abc.cd8s.all[["umap"]]@cell.embeddings
all.paths.viz<-list()

suppressWarnings({
for (branch.label in names(branch.cells.list)){

        viz.df.0<-as.data.frame(umap.coords)
        barcodes.branch<-branch.cells.list[[branch.label]]
        de.genes<-de.res[de.res$Condition == branch.label, ]

        # all genes
        expr<-GetAssayData(object = abc.cd8s.all, slot = slot, assay = assay)[de.genes$gene.id, barcodes.branch]
        n.genes<-dim(expr)[[1]]

        viz.df<-viz.df.0
        viz.df[['median.expression']]<-NA
        viz.df[barcodes.branch, 'median.expression']<-colMedians(expr)

        lower_q = min(viz.df$median.expression[!(is.na(viz.df$median.expression))])
        upper_q = max(viz.df$median.expression[!(is.na(viz.df$median.expression))])
        middle_q = mean(c(lower_q, upper_q))

        g.all<-ggplot(viz.df, aes(x = UMAP_1, y = UMAP_2, color = median.expression)) + geom_point() +
        scale_color_gradient2(low = green_hex, mid = yellow_hex, high = red_hex, name = 'median expression', 
                             limits = c(lower_q, upper_q), midpoint = middle_q)+
        ggtitle(paste0(branch.label, ' - All DE genes', ' | no. genes: ', n.genes))+
        theme_bw()

        g.red<-graph_branch_module(cluster.label = 'red',
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.blue<-graph_branch_module(cluster.label = 'blue',
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.brown<-graph_branch_module(cluster.label = 'brown',
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.pink<-graph_branch_module(cluster.label = 'pink',
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.turquoise<-graph_branch_module(cluster.label = 'turquoise',
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.black<-graph_branch_module(cluster.label = 'black',
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.yellow<-graph_branch_module(cluster.label = 'yellow',
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)


        all.paths.viz[[branch.label]]<-cowplot::plot_grid(g.all, g.red, g.blue, g.brown, g.pink, g.turquoise, g.black, g.yellow, 
                                ncol = 1, title = branch.label)

    }
})

Each on different color scales:

In [876]:
h_ = 20
w_ = 30
options(repr.plot.height=h_, repr.plot.width=w_)

g.final<-cowplot::plot_grid(all.paths.viz[[1]], all.paths.viz[[2]], all.paths.viz[[3]], all.paths.viz[[4]], 
                            all.paths.viz[[5]], 
                            ncol = length(all.paths.viz))

g.final
In [900]:
slot = 'scale.data'
assay = 'integrated'

umap.coords<-abc.cd8s.all[["umap"]]@cell.embeddings
all.paths.viz<-list()

suppressWarnings({
for (branch.label in names(branch.cells.list)){

        viz.df.0<-as.data.frame(umap.coords)
        barcodes.branch<-branch.cells.list[[branch.label]]
        de.genes<-de.res[de.res$Condition == branch.label, ]

        # all genes
        expr<-GetAssayData(object = abc.cd8s.all, slot = slot, assay = assay)[de.genes$gene.id, barcodes.branch]
        n.genes<-dim(expr)[[1]]

        viz.df<-viz.df.0
        viz.df[['median.expression']]<-NA
        viz.df[barcodes.branch, 'median.expression']<-colMedians(expr)

        lower_q = -2
        upper_q = 2
        middle_q = 0

        g.all<-ggplot(viz.df, aes(x = UMAP_1, y = UMAP_2, color = median.expression)) + geom_point() +
        scale_color_gradient2(low = green_hex, mid = yellow_hex, high = red_hex, name = 'median expression', 
                             limits = c(lower_q, upper_q), midpoint = middle_q)+
        ggtitle(paste0(branch.label, ' - All DE genes', ' | no. genes: ', n.genes))+
        theme_bw()

        g.red<-graph_branch_module(cluster.label = 'red', ind.scale = F, scale = c(lower_q, upper_q, middle_q),
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.blue<-graph_branch_module(cluster.label = 'blue', ind.scale = F, scale = c(lower_q, upper_q, middle_q),
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.brown<-graph_branch_module(cluster.label = 'brown', ind.scale = F, scale = c(lower_q, upper_q, middle_q),
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.pink<-graph_branch_module(cluster.label = 'pink', ind.scale = F, scale = c(lower_q, upper_q, middle_q),
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.turquoise<-graph_branch_module(cluster.label = 'turquoise', ind.scale = F, scale = c(lower_q, upper_q, middle_q),
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.black<-graph_branch_module(cluster.label = 'black', ind.scale = F, scale = c(lower_q, upper_q, middle_q),
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)
        g.yellow<-graph_branch_module(cluster.label = 'yellow', ind.scale = F, scale = c(lower_q, upper_q, middle_q),
                                   de.genes = de.genes, so = abc.cd8s.all, barcodes.branch = barcodes.branch, viz.df.0 = viz.df.0, assay = assay, slot = slot)


        all.paths.viz[[branch.label]]<-cowplot::plot_grid(g.all, g.red, g.blue, g.brown, g.pink, g.turquoise, g.black, g.yellow, 
                                ncol = 1, title = branch.label)

    }
})

All on the same color scale:

In [901]:
h_ = 20
w_ = 30
options(repr.plot.height=h_, repr.plot.width=w_)

g.final<-cowplot::plot_grid(all.paths.viz[[1]], all.paths.viz[[2]], all.paths.viz[[3]], all.paths.viz[[4]], 
                            all.paths.viz[[5]], 
                            ncol = length(all.paths.viz))

g.final